{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# High-level Chainer Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import numpy as np\n",
    "import math\n",
    "import chainer\n",
    "import chainer.functions as F\n",
    "import chainer.links as L\n",
    "from chainer import optimizers\n",
    "from chainer import cuda\n",
    "from common.params import *\n",
    "from common.utils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Performance Improvement\n",
    "# 1. Auto-tune\n",
    "# This adds very little now .. not sure if True by default?\n",
    "chainer.global_config.autotune = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Force one-gpu\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OS:  linux\n",
      "Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) \n",
      "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n",
      "Chainer:  3.4.0\n",
      "CuPy:  2.4.0\n",
      "Numpy:  1.14.1\n",
      "GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']\n",
      "CUDA Version 8.0.61\n",
      "CuDNN Version  6.0.21\n"
     ]
    }
   ],
   "source": [
    "print(\"OS: \", sys.platform)\n",
    "print(\"Python: \", sys.version)\n",
    "print(\"Chainer: \", chainer.__version__)\n",
    "print(\"CuPy: \", chainer.cuda.cupy.__version__)\n",
    "print(\"Numpy: \", np.__version__)\n",
    "print(\"GPU: \", get_gpu_name())\n",
    "print(get_cuda_version())\n",
    "print(\"CuDNN Version \", get_cudnn_version())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SymbolModule(chainer.Chain):\n",
    "    def __init__(self, n_classes=N_CLASSES):\n",
    "        super(SymbolModule, self).__init__()\n",
    "        with self.init_scope():\n",
    "            self.conv1 = L.Convolution2D(3, 50, ksize=3, pad=1)\n",
    "            self.conv2 = L.Convolution2D(50, 50, ksize=3, pad=1)\n",
    "            self.conv3 = L.Convolution2D(50, 100, ksize=3, pad=1)\n",
    "            self.conv4 = L.Convolution2D(100, 100, ksize=3, pad=1)\n",
    "            # feature map size is 8*8 by pooling\n",
    "            self.fc1 = L.Linear(100*8*8, 512)\n",
    "            self.fc2 = L.Linear(512, n_classes)\n",
    "    \n",
    "    def __call__(self, x):\n",
    "        h = self.conv2(F.relu(self.conv1(x)))\n",
    "        h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))\n",
    "        h = F.dropout(h, 0.25)\n",
    "        \n",
    "        h = self.conv4(F.relu(self.conv3(h)))\n",
    "        h = F.relu(F.max_pooling_2d(h, ksize=2, stride=2))\n",
    "        h = F.dropout(h, 0.25)       \n",
    "        \n",
    "        h = F.dropout(F.relu(self.fc1(h)), 0.5)\n",
    "        return self.fc2(h)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def init_model(m, lr=LR, momentum=MOMENTUM):\n",
    "    optimizer = optimizers.MomentumSGD(lr, momentum)\n",
    "    optimizer.setup(m)\n",
    "    return optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preparing train set...\n",
      "Preparing test set...\n",
      "(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)\n",
      "float32 float32 int32 int32\n",
      "CPU times: user 605 ms, sys: 612 ms, total: 1.22 s\n",
      "Wall time: 1.22 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Data into format for library\n",
    "x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)\n",
    "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
    "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 216 ms, sys: 132 ms, total: 349 ms\n",
      "Wall time: 348 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Create symbol\n",
    "sym = SymbolModule()\n",
    "chainer.cuda.get_device(0).use()  # Make a specified GPU current\n",
    "sym.to_gpu()  # Copy the model to the GPU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 115 µs, sys: 0 ns, total: 115 µs\n",
      "Wall time: 119 µs\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "optimizer = init_model(sym)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "CPU times: user 1min 7s, sys: 1.61 s, total: 1min 8s\n",
      "Wall time: 1min 9s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Main training loop: 69s\n",
    "for j in range(EPOCHS):\n",
    "    for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n",
    "        # Get samples\n",
    "        data = cuda.to_gpu(data)\n",
    "        target = cuda.to_gpu(target)\n",
    "        # Forwards\n",
    "        output = sym(data)\n",
    "        # Loss\n",
    "        loss = F.softmax_cross_entropy(output, target)\n",
    "        sym.cleargrads()\n",
    "        # Back-prop\n",
    "        loss.backward()\n",
    "        optimizer.update()\n",
    "    # Log\n",
    "    print(j)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 466 ms, sys: 0 ns, total: 466 ms\n",
      "Wall time: 466 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Main evaluation loop: 800ms\n",
    "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n",
    "y_guess = np.zeros(n_samples, dtype=np.int)\n",
    "y_truth = y_test[:n_samples]\n",
    "c = 0\n",
    "with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):\n",
    "    for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n",
    "        # Forwards\n",
    "        pred = cuda.to_cpu(sym(cuda.to_gpu(data)).data.argmax(-1))\n",
    "        # Collect results\n",
    "        y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n",
    "        c += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy:  0.7901642628205128\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy: \", 1.*sum(y_guess == y_truth)/len(y_guess))"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
